-
Notifications
You must be signed in to change notification settings - Fork 48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
make demos_pool a local var rather than a separate stream #1436
Conversation
781f19c
to
850a7c0
Compare
a53f14e
to
95a9560
Compare
if item["input_fields"] == instance["input_fields"]: | ||
not_selected_from_from_stream.append(instance) | ||
break | ||
demos_pool.append(instance) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
demos_pool.append(instance) | |
demos_pool.append(instance) | |
demos_pool_set.add(str(insance["input_fields"])) |
src/unitxt/standard.py
Outdated
|
||
if len(demos_pool) < self.demos_pool_size: | ||
raise ValueError( | ||
f"Unable to fetch enough ({self.demos_pool_size}) instances from stream {self.from_stream} for the demos_pool" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
f"Unable to fetch enough ({self.demos_pool_size}) instances from stream {self.from_stream} for the demos_pool" | |
f"Unable to fetch enough ({self.demos_pool_size}) instances from stream {self.from_stream} for the demos_pool, condsider increasing loader_limit or less strict stream filtering" |
src/unitxt/standard.py
Outdated
@@ -28,8 +40,72 @@ | |||
|
|||
|
|||
# Used to give meaningful name to recipe steps | |||
class CreateDemosPool(SeparateSplit): | |||
pass | |||
class CreateDemosPoolAndSpreadDemos(MultiStreamOperator): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
class CreateDemosPoolAndSpreadDemos(MultiStreamOperator): | |
class CreateDemosPoolAndAssignDemosToInstaces(MultiStreamOperator): |
src/unitxt/standard.py
Outdated
|
||
if self.sample is None: | ||
return MultiStream.from_generators(new_streams) | ||
self.sample.demos_pool = demos_pool |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.sample.demos_pool = demos_pool | |
self.sample.set_demos_pool(demos_pool) |
src/unitxt/splitters.py
Outdated
class Sample(InstanceOperatorWithMultiStreamAccess): | ||
from_stream: str | ||
class Sample(InstanceOperator): | ||
demos_pool: List[Dict[str, Any]] = None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
demos_pool: List[Dict[str, Any]] = None |
@@ -350,30 +350,21 @@ def get_sample_size(self, instance) -> int: | |||
pass | |||
|
|||
def process( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
def process( | |
@property | |
def is_demos_pool_set(self, instance): | |
return "demos_pool" in instance and instance["demos_pool"] is not None and isinstance(instance["demos_pool"], list) and len(instance["demos_pool"]) > self.get_sample_size(instance) | |
def process( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- Split operators to to CreateDemosPool and AssignDemosToInstances
- Allow having pre made "demos_pool" or "demos" in instances
- Check that every instance contain only pointer to the demos pool and no copy of it (god forbid)
03ffdde
to
ccebc83
Compare
src/unitxt/standard.py
Outdated
from_stream: str = None | ||
demos_pool_size: int = None | ||
remove_targets_from_source_split: bool = None | ||
to_field: str = "_demos_pool_" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to_field: str = "_demos_pool_" | |
to_field: str = constants.demos_pool_field |
src/unitxt/standard.py
Outdated
@@ -59,14 +239,18 @@ class BaseRecipe(Recipe, SourceSequentialOperator): | |||
test_refiner: StreamRefiner = OptionalField(default_factory=StreamRefiner) | |||
|
|||
demos_pool_size: int = None | |||
given_demos_pool: List[Dict[str, Any]] = None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
given_demos_pool: List[Dict[str, Any]] = None | |
demos_pool: List[Dict[str, Any]] = None |
|
||
constants = get_constants() | ||
settings = get_settings() | ||
logger = get_logger() | ||
|
||
|
||
# Used to give meaningful name to recipe steps | ||
class CreateDemosPool(SeparateSplit): | ||
pass | ||
class CreateDemosPool(MultiStreamOperator): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
class CreateDemosPool(MultiStreamOperator): | |
class AddDemosPool(Set): | |
field = constants.demos_pool_field | |
class CreateDemosPool(MultiStreamOperator): |
tests/library/test_recipe.py
Outdated
recipe2 = DatasetRecipe( | ||
card="cards.wnli", | ||
template_card_index=0, | ||
given_demos_pool=for_demos, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
given_demos_pool=for_demos, | |
given_demos_pool=[ | |
{"text": "hello world", "label": "pre"} # task complient dicts | |
], |
24342d7
to
9d377d6
Compare
140fddd
to
392b5e3
Compare
…estart train repeatedly. Fix needed in Loaders. Allow a given demos_pool, or input stream-instances already loaded with demos Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
1d6502e
to
2ffb0f1
Compare
Signed-off-by: dafnapension <[email protected]>
Signed-off-by: dafnapension <[email protected]>
Ensures pool of demo instances is made of distinct items, and suffices for each ordinary instance, even if that ordinary instance is identical to one of the demo instances in the pool (see issue #1416).
Demo instances do not constitute a stream, thereby do not get loaded with demo instances by themselves (see issue #1210)
Extended functionality: Allow an input of a demos_pool, or an input in which (some or all) instances are already loaded each with demos